from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import re
import pandas as pd

browser = webdriver.Chrome()
browser.maximize_window()
url = 'http://www.kepu.gov.cn/www'
browser.get(url)
time.sleep(3)
browser.find_element(By.CSS_SELECTOR, '#sonnavhtml').find_element(By.LINK_TEXT, '农业').click()
time.sleep(3)
handles = browser.window_handles
browser.switch_to.window(handles[-1])
html_code = browser.page_source
# with open(file='kepu.txt', mode='w', encoding='utf-8') as f:
#     f.write(html_code)
browser.quit()

p_title = '<div class="media-heading"><a href=".*?" target="_blank">(.*?)</a>'
p_link = '<div class="media-heading"><a href="(.*?)" target="_blank">.*?</a>'
p_date = '<span class="date">(.*?)</span>'
p_source = '<span class="source">(.*?)</span>'

title = re.findall(p_title, html_code, re.S)
link = re.findall(p_link, html_code, re.S)
date = re.findall(p_date, html_code, re.S)
source = re.findall(p_source, html_code, re.S)
# print(len(title), len(link), len(date), len(source))
# print(title, link, date, source, sep='\n')

title = title[:-1]
link = link[:-1]
date = date[:-1]
source = source[:-1]
# print(len(title), len(link), len(date), len(source))
# print(title, link, date, source, sep='\n')

data = {'标题': title, '网址': link, '发布时间': date, '来源': source}
data = pd.DataFrame(data)
data.to_excel('农业资讯(单页).xlsx', index=False)

